{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: tqdm in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (4.46.0)\n",
      "Requirement already satisfied: lightgbm in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (2.3.0)\n",
      "Requirement already satisfied: scikit-learn in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (from lightgbm) (0.22.1)\n",
      "Requirement already satisfied: numpy in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (from lightgbm) (1.17.4)\n",
      "Requirement already satisfied: scipy in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (from lightgbm) (1.3.2)\n",
      "Requirement already satisfied: joblib>=0.11 in d:\\program\\anaconda\\envs\\ai\\lib\\site-packages (from scikit-learn->lightgbm) (0.14.1)\n"
     ]
    }
   ],
   "source": [
    "! pip install tqdm\n",
    "! pip install lightgbm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from tqdm import tqdm\n",
    "from sklearn.metrics import mean_squared_error,explained_variance_score\n",
    "from sklearn.model_selection import KFold\n",
    "import lightgbm as lgb\n",
    "import math\n",
    "import os\n",
    "from joblib import Parallel, delayed\n",
    "\n",
    "test_data_path = '../data/A_testData0531.csv'\n",
    "route_order_folder_path = '../data/route_order_data'\n",
    "port_path = '../data/port.csv'\n",
    "result_path = '../result_server_20200624-changed-feature.csv'\n",
    "\n",
    "# import moxing as mox\n",
    "# OBS_RES_PATH =  \"s3://ship-eta/result/result_server_20200622.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "def format_data_type(data, mode='train'):\n",
    "    if mode=='test':\n",
    "        data['onboardDate'] = pd.to_datetime(data['onboardDate'], infer_datetime_format=True)\n",
    "        data['temp_timestamp'] = data['timestamp']\n",
    "        data['ETA'] = None\n",
    "        data['creatDate'] = None\n",
    "    data['loadingOrder'] = data['loadingOrder'].astype(str)\n",
    "    data['timestamp'] = pd.to_datetime(data['timestamp'], infer_datetime_format=True)\n",
    "    data['longitude'] = data['longitude'].astype(float)\n",
    "    data['latitude'] = data['latitude'].astype(float)\n",
    "    data['speed'] = data['speed'].astype(float)\n",
    "    data['TRANSPORT_TRACE'] = data['TRANSPORT_TRACE'].astype(str)\n",
    "    return data\n",
    "\n",
    "def get_test_data_info(path):\n",
    "    data = pd.read_csv(path) \n",
    "    test_trace_set = data['TRANSPORT_TRACE'].unique()\n",
    "    test_order_belong_to_trace = {}\n",
    "    for item in test_trace_set:\n",
    "        orders = data[data['TRANSPORT_TRACE'] == item]['loadingOrder'].unique()\n",
    "        test_order_belong_to_trace[item] = orders\n",
    "    return format_data_type(data, mode='test'), test_trace_set, test_order_belong_to_trace\n",
    "\n",
    "test_data_origin, test_trace_set, test_order_belong_to_trace = get_test_data_info(test_data_path)\n",
    "\n",
    "def get_port_info():\n",
    "    port_data = {}\n",
    "    test_port_set = set()\n",
    "    for route in test_trace_set:\n",
    "        ports = route.split('-')\n",
    "        test_port_set = set.union(test_port_set, set(ports))\n",
    "    port_data_origin = pd.read_csv(port_path)\n",
    "    for item in port_data_origin.itertuples():\n",
    "        if getattr(item, 'TRANS_NODE_NAME') in test_port_set:\n",
    "            port_data[getattr(item, 'TRANS_NODE_NAME')] = {'LONGITUDE': getattr(item, 'LONGITUDE'),'LATITUDE': getattr(item, 'LATITUDE') }\n",
    "    return port_data\n",
    "port_data = get_port_info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_train_route_order_data(route):\n",
    "    route_order_data_path = os.path.join(route_order_folder_path, \"{}.csv\".format(route))\n",
    "    data = pd.read_csv(route_order_data_path, header=None\n",
    "           , names=['loadingOrder','timestamp','longitude','latitude','speed'])\n",
    "    if (data.shape[0] == 0):\n",
    "        print(\"error == \", route)\n",
    "    data['timestamp'] = pd.to_datetime(data['timestamp'], infer_datetime_format=True)\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 90,
   "metadata": {},
   "outputs": [],
   "source": [
    "from math import radians, cos, sin, asin, sqrt\n",
    "def haversine(lon1, lat1, lon2, lat2): # 经度1，纬度1，经度2，纬度2 （十进制度数）\n",
    "    # 将十进制度数转化为弧度\n",
    "    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])\n",
    "    # haversine公式\n",
    "    dlon = lon2 - lon1 \n",
    "    dlat = lat2 - lat1 \n",
    "    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2\n",
    "    c = 2 * asin(sqrt(a)) \n",
    "    r = 6371 # 地球平均半径，单位为公里\n",
    "    return c * r * 1000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "def handle_train_data(order_info_set,start_longitude,start_latitude,dest_longitude,dest_latitude):\n",
    "    order_info_set = order_info_set.reset_index(drop=True)\n",
    "    #       获取起航时间\n",
    "    start_time = order_info_set['timestamp'].min()\n",
    "    start_index = 0\n",
    "    for (index, info_item) in order_info_set.iterrows():\n",
    "        if abs(info_item['longitude']-start_longitude) < 0.5 and abs(info_item['latitude']-start_latitude) < 0.5 and info_item['speed'] > 0:\n",
    "            start_time = max(start_time, info_item['timestamp'])\n",
    "            start_index = index\n",
    "            break \n",
    "#       获取到达目的地时间，这里需要用 GPS 判断\n",
    "    end_time = order_info_set['timestamp'].max()\n",
    "    end_index = order_info_set.size-1\n",
    "    for (index, info_item) in order_info_set.iterrows():\n",
    "        if abs(info_item['longitude'] - dest_longitude) < 0.3 and abs(info_item['latitude'] - dest_latitude) < 0.3:\n",
    "            end_time = min(end_time, info_item['timestamp'])\n",
    "            end_index = index\n",
    "            break\n",
    "#         修正起点终点逆序\n",
    "    if (end_time < start_time):\n",
    "        start_time,end_time = end_time,start_time\n",
    "        start_index,end_index = end_index,start_index\n",
    "#         算出航行用时\n",
    "    total_seconds = (end_time - start_time).total_seconds()\n",
    "#         人工截取前 40% 的数据   \n",
    "    order_info_set = order_info_set[start_index:end_index+1]\n",
    "    cut_size = math.ceil(order_info_set.shape[0]*0.4)\n",
    "    order_info_set = order_info_set[0:cut_size]\n",
    "#         截取数据\n",
    "    if (order_info_set.shape[0] > 100):\n",
    "        index = np.linspace(0, order_info_set.shape[0]-1, num=100,dtype=int).tolist()\n",
    "        order_info_set = order_info_set.iloc[index]     \n",
    "#         获取特征\n",
    "    tail_info = order_info_set.tail(1).values.tolist()[0]\n",
    "    head_info = order_info_set.head(1).values.tolist()[0]\n",
    "\n",
    "    dis_to_dest = haversine(tail_info[2],tail_info[3],dest_longitude,dest_latitude)\n",
    "\n",
    "    dis_to_start = haversine(tail_info[2],tail_info[3],start_longitude,start_latitude)\n",
    "\n",
    "    trace_dis = haversine(head_info[2],head_info[3],tail_info[2],tail_info[3])\n",
    "    trace_time=(pd.Timedelta(tail_info[1]-head_info[1])).total_seconds()\n",
    "    if trace_time==0:\n",
    "        trace_time=1\n",
    "    trace_mean_speed=trace_dis*3.6/trace_time\n",
    "\n",
    "    feature_temp = pd.DataFrame({'dis_to_dest':[dis_to_dest], 'dis_to_start':[dis_to_start], \n",
    "        'trace_mean_speed':[trace_mean_speed], 'label':[total_seconds]})\n",
    "\n",
    "    return feature_temp   \n",
    "\n",
    "def get_train_data(route_order_info, route,start_longitude,start_latitude,dest_longitude,dest_latitude):\n",
    "    order_list = route_order_info['loadingOrder'].unique()\n",
    "    print(route, order_list.shape)\n",
    "    \n",
    "    data_grouped = route_order_info.groupby('loadingOrder')\n",
    "\n",
    "    train_data = Parallel(n_jobs=8)(delayed(handle_train_data)\n",
    "                                    (group,start_longitude,start_latitude,dest_longitude,dest_latitude)\n",
    "                                    for name, group in tqdm(data_grouped))\n",
    "#     for name, group in tqdm(data_grouped):\n",
    "#         handle_train_data(group,start_longitude,start_latitude,dest_longitude,dest_latitude)\n",
    "#         break\n",
    "    train_data = pd.concat(train_data)\n",
    "    if (train_data.shape[0] < 10):\n",
    "        for i in range(5):\n",
    "            train_data = pd.concat([train_data,train_data])\n",
    "    \n",
    "    return train_data.reset_index(drop=True)\n",
    "\n",
    "def get_test_data(order,start_longitude,start_latitude,dest_longitude,dest_latitude):\n",
    "    order_info_set = test_data_origin[test_data_origin['loadingOrder'] == order]\n",
    "    order_info_set['timestamp'] = pd.to_datetime(order_info_set['timestamp'], infer_datetime_format=True)\n",
    "    tail_info = order_info_set.tail(1).values.tolist()[0]\n",
    "    head_info = order_info_set.head(1).values.tolist()[0]\n",
    "\n",
    "    dis_to_dest = haversine(tail_info[2],tail_info[3],dest_longitude,dest_latitude)\n",
    "\n",
    "    dis_to_start = haversine(tail_info[2],tail_info[3],start_longitude,start_latitude)\n",
    "\n",
    "    trace_dis = haversine(head_info[2],head_info[3],tail_info[2],tail_info[3])\n",
    "\n",
    "    trace_time=(pd.Timedelta(tail_info[1]-head_info[1])).seconds\n",
    "    if trace_time==0:\n",
    "        trace_time=1\n",
    "    trace_mean_speed=trace_dis*3.6/trace_time\n",
    "\n",
    "    feature_temp = pd.DataFrame({'dis_to_dest':[dis_to_dest], 'dis_to_start':[dis_to_start], \n",
    "        'trace_mean_speed':[trace_mean_speed]})\n",
    "\n",
    "    return feature_temp\n",
    "def mse_score_eval(preds, valid):\n",
    "    labels = valid.get_label()\n",
    "    scores = mean_squared_error(y_true=labels, y_pred=preds)\n",
    "    return 'mse_score', scores, True\n",
    "def train_model(x, y, seed=981125, is_shuffle=True):\n",
    "    train_pred = np.zeros((x.shape[0], ))\n",
    "    n_splits = min(5, x.shape[0])\n",
    "    # Kfold\n",
    "    fold = KFold(n_splits=n_splits, shuffle=is_shuffle, random_state=seed)\n",
    "    kf_way = fold.split(x)\n",
    "    # params\n",
    "    params = {\n",
    "        'learning_rate': 0.01,\n",
    "        'boosting_type': 'gbdt',\n",
    "        'objective': 'regression',\n",
    "        'num_leaves': 36,\n",
    "        'feature_fraction': 0.6,\n",
    "        'bagging_fraction': 0.7,\n",
    "        'bagging_freq': 6,\n",
    "        'seed': 8,\n",
    "        'bagging_seed': 1,\n",
    "        'feature_fraction_seed': 7,\n",
    "        'min_data_in_leaf': 25,\n",
    "        'nthread': 8,\n",
    "        'verbose': 1,\n",
    "    }\n",
    "    # train\n",
    "    for n_fold, (train_idx, valid_idx) in enumerate(kf_way, start=1):\n",
    "        train_x, train_y = x.iloc[train_idx], y.iloc[train_idx]\n",
    "        valid_x, valid_y = x.iloc[valid_idx], y.iloc[valid_idx]\n",
    "        # 数据加载\n",
    "        n_train = lgb.Dataset(train_x, label=train_y)\n",
    "        n_valid = lgb.Dataset(valid_x, label=valid_y)\n",
    "        clf = lgb.train(\n",
    "            params=params,\n",
    "            train_set=n_train,\n",
    "            num_boost_round=3000,\n",
    "            valid_sets=[n_valid],\n",
    "            early_stopping_rounds=100,\n",
    "            verbose_eval=100,\n",
    "            feval=mse_score_eval\n",
    "        )\n",
    "        train_pred[valid_idx] = clf.predict(valid_x, num_iteration=clf.best_iteration)\n",
    "    return clf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 14%|█▎        | 3/22 [00:02<00:13,  1.45it/s]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-92-b595c96787fc>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m     10\u001b[0m     \u001b[0mdest_latitude\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mport_data\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdest_port\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'LATITUDE'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 12\u001b[1;33m     \u001b[0mroute_order_info\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_train_route_order_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mroute\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     13\u001b[0m     \u001b[0mtrain_data\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mget_train_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mroute_order_info\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mroute\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mstart_longitude\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mstart_latitude\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdest_longitude\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdest_latitude\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[1;31m#     print(train_data)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-89-c774a6673d89>\u001b[0m in \u001b[0;36mget_train_route_order_data\u001b[1;34m(route)\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m         \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"error == \"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mroute\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m     \u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'timestamp'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_datetime\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'timestamp'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfer_datetime_format\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      8\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\util\\_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    206\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    207\u001b[0m                     \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 208\u001b[1;33m             \u001b[1;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    209\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    210\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\core\\tools\\datetimes.py\u001b[0m in \u001b[0;36mto_datetime\u001b[1;34m(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[0;32m    774\u001b[0m         \u001b[0mcache_array\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_maybe_cache\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    775\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mcache_array\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 776\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marg\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcache_array\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    777\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    778\u001b[0m             \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconvert_listlike\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mformat\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36mmap\u001b[1;34m(self, arg, na_action)\u001b[0m\n\u001b[0;32m   3826\u001b[0m         \u001b[0mdtype\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3827\u001b[0m         \"\"\"\n\u001b[1;32m-> 3828\u001b[1;33m         \u001b[0mnew_values\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_map_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marg\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mna_action\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3829\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_values\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__finalize__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3830\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\core\\base.py\u001b[0m in \u001b[0;36m_map_values\u001b[1;34m(self, mapper, na_action)\u001b[0m\n\u001b[0;32m   1275\u001b[0m                 \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1276\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1277\u001b[1;33m             \u001b[0mindexer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1278\u001b[0m             \u001b[0mnew_values\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0malgorithms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtake_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmapper\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1279\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_indexer\u001b[1;34m(self, target, method, limit, tolerance)\u001b[0m\n\u001b[0;32m   2968\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mensure_platform_int\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrepeat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2969\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2970\u001b[1;33m         \u001b[0mpself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mptarget\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_promote\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2971\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mpself\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mself\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mptarget\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2972\u001b[0m             return pself.get_indexer(\n",
      "\u001b[1;32mD:\\Program\\Anaconda\\envs\\AI\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36m_maybe_promote\u001b[1;34m(self, other)\u001b[0m\n\u001b[0;32m   4823\u001b[0m         \u001b[1;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDatetimeIndex\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4824\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4825\u001b[1;33m         \u001b[1;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"date\"\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mother\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDatetimeIndex\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   4826\u001b[0m             \u001b[1;32mreturn\u001b[0m \u001b[0mDatetimeIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mother\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4827\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"boolean\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "for route in tqdm(test_order_belong_to_trace):\n",
    "    ports = route.split(\"-\")\n",
    "    start_port = ports[0]\n",
    "    dest_port = ports[-1]\n",
    "    start_longitude = port_data[start_port]['LONGITUDE']\n",
    "    start_latitude = port_data[start_port]['LATITUDE']\n",
    "    dest_longitude = port_data[dest_port]['LONGITUDE']\n",
    "    dest_latitude = port_data[dest_port]['LATITUDE']\n",
    "    \n",
    "    route_order_info = get_train_route_order_data(route)\n",
    "    train_data = get_train_data(route_order_info, route,start_longitude,start_latitude,dest_longitude,dest_latitude)\n",
    "#     print(train_data)\n",
    "    \n",
    "    features = [c for c in train_data.columns if c not in ['loadingOrder', 'label']]\n",
    "    model_by_route = train_model(train_data[features], train_data['label'])\n",
    "    \n",
    "    for order in test_order_belong_to_trace[route]:\n",
    "        test_order_data = get_test_data(order,start_longitude,start_latitude,dest_longitude,dest_latitude)\n",
    "\n",
    "        res = model_by_route.predict(test_order_data[features], num_iteration=model_by_route.best_iteration)\n",
    "        test_data_origin.loc[test_data_origin['loadingOrder'] == order, 'ETA'] = (test_data_origin[test_data_origin['loadingOrder'] == order]['onboardDate'] + pd.Timedelta(seconds=res[0])).apply(lambda x:x.strftime('%Y/%m/%d  %H:%M:%S'))\n",
    "        \n",
    "test_data_origin['creatDate'] = pd.datetime.now().strftime('%Y/%m/%d  %H:%M:%S')\n",
    "test_data_origin['timestamp'] = test_data_origin['temp_timestamp']\n",
    "\n",
    "result = test_data_origin[['loadingOrder', 'timestamp', 'longitude', 'latitude', 'carrierName', 'vesselMMSI', 'onboardDate', 'ETA', 'creatDate']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "result.to_csv(result_path, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.7.6 64-bit ('AI': conda)",
   "language": "python",
   "name": "python37664bitaiconda6859e03b37c34f0182c9bde8073269f7"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
